********************************************************************************
* matching_worker_w_roa.do
* Purpose: ROBUSTNESS CHECK - Worker PSM using the ROA-matched firm list.
*
* Identical to baseline matching_worker.do in all eligibility and PSM settings,
* but uses firm_matched_list_w_roa.dta (from matching_firm_w_roa.do) instead of
* the baseline firm_matched_list.dta. Workers are matched within firm pairs
* that were themselves matched on ROA.
*
* Output: $data/worker_matched_w_roa.dta
********************************************************************************
global start_year = 2005
global end_year = 2016

**# matching
forvalues y = $start_year/$end_year {

	use $data/worker_`y', replace
	
	gen year_prior	=	year
	// merge in matched firms
	joinby entid_syn year_prior using $data/firm_matched_w_roa_list
	
	
	** Keep worker if earn more than ~4k CAD in real wage **
	gegen 	total_wage 		= sum(t4earn), by(casenum2019)
	gen		real_total_wage = total_wage/CPI_base_2011
	drop if real_total_wage < 3900 | mi(real_total_wage)
	
	* drop workers moonlighters //
	drop if moonlighter == 1

	* drop workers with less than 4 years of tenure and gaps in their employment
	drop 	if max_gap > 1 | mi(max_gap)					// drop workers with gaps in their employment
	gen 	present_at_treated = (`y' + 1 <= last_year_at_firm)
	drop 	if present_at_treated == 0 						// drop workers that leave firms at t = 0
	drop 	if tenure < 4									// drop workers with less than 4 years of tenure
	
	drop if mi(t1_age_recorded) | mi(t1_sex_recorded) | mi(naics2) | mi(OPAddressProvince)
	
	** worker age: 5 years bin **
	gen bin_age	=	int(( t1_age_recorded - 17)/5)
	
	* create interactions between bins
	* workers must be matches within the same cells
	gegen cell = group(naics2 OPAddressProvince t1_sex_recorded bin_*)
	drop if cell == .
	
	* estimate pscore using the eligible sample
	reg treated c.t1_age_recorded#c.t1_age_recorded
	
	predict pscore
	
	replace pscore = pscore + 10.0*cell
	
	** we have to sort the data for replication
	gen outcome = rnormal()
	
	*** Fix the sorting of data
	gsort casenum2019
	
	* matching
	psmatch2 treated, outcome(outcome) pscore(pscore) caliper(1) n(1) noreplacement
	sum outcome

	gen 	id = _id if treated==0 & _weight==1
	replace id = _n1 if treated==1 & _weight==1
	
	drop pairid
	gegen pairid = group(id)
	gsort pairid
	
	drop if mi(id)
	keep casenum2019 entid_syn treated pairid year_prior
	
	save $data/worker_matched_w_roa_list_`y',  replace 
	
}

drop _all
forvalues y = $start_year/$end_year {
	dis( "`y'")
	append using $data/worker_matched_w_roa_list_`y',  keep(casenum2019 entid_syn treated pairid year_prior) force
}	

compress
save $data/worker_matched_w_roa_list, replace

**# matched worker panel
forvalues y = 2001/2017 {

	use $data/worker_`y', clear
	
	rename entid_syn temp_id
	
	* keep matched treated and control workers
	joinby casenum2019 	using $data/worker_matched_w_roa_list, unmatched(none)
	merge m:1 entid_syn using $data/first_mna, keep(1 3) keepusing(entid_syn) nogen

	** keep workers at dominant (M&A) firms **
	gen 	same_firm = (temp_id == entid_syn)
	drop 	entid_syn
	rename	temp_id entid_syn
	
	gegen total_wage = sum(t4earn), by(casenum2019 pairid year_prior)
	
	gsort casenum2019 year_prior -same_firm -t4earn entid_syn
	duplicates drop casenum2019 year_prior pairid, force
	
	drop same_firm
	
	compress
	save $data/worker_matched_`y', replace
}

drop _all
forvalues y = 2001/2017 {
	append using $data/worker_matched_`y', force
	erase $data/worker_matched_`y'.dta
}

save $data/worker_matched_w_roa_intermid, replace

**# matched worker panel
gsort pairid year_prior -treated year

** ID variables
gegen worker_id	= group(casenum2019)
gegen firm_id 	= group(entid_syn)
gegen id 		= group(worker_id year_prior)

**# Time Variables
gen 	t = year - (year_prior + 1)
replace t = 6 	if t >  5 	& ~mi(t)
replace t = -6 	if t < -5 	& ~mi(t)
tab t, gen(ds_)
levelsof t, local(ts)
local end = r(r)
forvalue i = 1(1)`end'{
	local temp = `i' - 7
	label variable ds_`i' "`temp'"
}
replace ds_6 = 0

**# Sector
destring naics, replace
drop naics2
gen 	naics2 = int(naics/100)
replace naics2 = 31 if naics2 == 32 | naics2 == 33
replace naics2 = 44 if naics2 == 45
replace naics2 = 48 if naics2 == 49
replace naics2 = 54 if naics2 == 56 | naics2 == 61 | naics2 == 62

sysdir set PLUS "Z:\VRDC-PROJ-6730\Moon_6730\ado"
run "Z:\VRDC-PROJ-6730\Moon_6730\ado\_\_gtools_internal.mata"

** identify the sector prior to the event
gen naics2_event 	= naics2	if treated == 1	& year == year_prior
gen naics_event		= naics		if year == year_prior
gen firm_event 		= firm_id 	if year == year_prior
gegen naics_tmp 	= firstnm(naics_event), by(id)
gegen firmid_tmp 	= firstnm(firm_event),  by(id)
replace naics 		= naics_tmp if firm_id == firmid_tmp & year <= year_prior + 1

** merge in mna characteristics
merge	m:1 entid_syn 	using 	$data/first_mna, keep(1 3) keepusing(DEAL Acquirer other_party_id merger) nogen
replace DEAL = . 			if t ~= -1 & treated == 1
replace Acquirer = . 		if t ~= -1 & treated == 1
replace merger = .  		if t ~= -1 & treated == 1
replace other_party_id = ""	if t ~= -1 & treated == 1

**# Transition Dummies
gsort id year

** Moved
by id: 	gen year_diff	= year - year[_n-1]
by id: 	gen moved 		= (firm_id ~= firm_id[_n-1] | year_diff >= 2) & ~mi(year_diff)

gegen	total_moves			= total(moved * (t > 0) ), 			by(id)
gegen	ever_moved_post		= max(moved * (t > 0)), 			by(id)

gegen	first_move_year		= min(year) if moved == 1 & t > 0, 	by(id)
gen		t_moved				= first_move_year	-	(year_prior + 1)

** Moved Sector
by id:	gen moved_sec= (naics2 ~= naics2[_n-1])		if moved == 1
replace 	moved_sec	=	0 if mi(moved_sec)

** Moved Industry
by id:	gen moved_ind	= (naics ~= naics[_n-1])	if moved == 1
replace 	moved_ind		=	0 if mi(moved_ind)

** fired vs. quit 
gen 	fired	=	1	if reason == 1	| reason == 7 | reason == 11
replace fired 	= 	0	if fired ~= 1 & ~mi(reason)
by id:	replace fired	=	.	if moved[_n+1] == 0
replace fired	=	.	if first_move_year[_n+1]	~=	year[_n+1]
replace fired	=	.	if treated == 0

**# pair-level variables
gsort pairid year_prior -treated year

gegen matched_acq           = firstnm(Acquirer), 			by(pairid year_prior)
gegen matched_emoved		= firstnm(ever_moved_post), 	by(pairid year_prior)

** staying workers without any gap in their employment
gen 	original_firm_tmp	= firm_id if year == year_prior
gegen 	original_firm 		= firstnm(original_firm_tmp), by(id)
replace original_firm 		= . if firm_id ~= original_firm

gen 	present_at_firm	= 1 if original_firm == firm_id & t <= 0
replace present_at_firm = 1 if original_firm == firm_id & t > 0 & moved == 0

compress
save $data/worker_matched_w_roa, replace